clear all 
set more off 


*---------------------------------------------------------------------*
* TSIV RESULTS 
*---------------------------------------------------------------------*

* Bring in data
	use "$Mydirectory1/3_Output/TSIV_2Samples_1940Census.dta", clear 
	drop log_*

	forval i=1(1)2 {
		gen est_`i'=. 
		gen est_lb_`i' =.
		gen est_ub_`i' =.
	}
	
	tempfile fulldata 
	save `fulldata'
	
*-------------------------------------*
* Regressions  *
*-------------------------------------*

* Loop through each decade 		
	foreach m in 1 2 3 4 {
		forval i=1920(10)1930 {
		
			use `fulldata', clear
			noisily display "Year `i', group `m'"
			
		* Restrict sample and set up triplets
			keep if survey1936==1 | (census==1 & problem_occs!=1) | (surveys==1 & decade==`i') 
			replace census=1 if survey1936==1

			if "`m'"=="1" {
				keep if census==1 | (surveys==1 & sex==1 & race==1) 
			}
			if "`m'"=="2" {
				keep if census==1 | (surveys==1 & race==1)
			}
			if "`m'"=="3" {
				keep if census==1 | (surveys==1 & (race==1 | (race==2 & sex==1)))
			}
			if "`m'"=="4" {
				count //not dropping anyone
			}
				
			local var_list "fatheroccej race south_merge" 
			egen triplet = group(`var_list')
			
			bysort census triplet: gen tag = _n==1 //tag first observation of triplet
			bysort triplet: egen number_samples = sum(tag) 
			keep if number_samples==2 //keep if triplet is in both census and surveys
			
		* Remake the dummies for the triplets that exist in both datasets
			drop triplet* 
			egen triplet = group(`var_list')
			quietly tab triplet, gen(triplet_)
			local max = `r(r)'
			local max_minus1 = `r(r)'-1 //grabs # of dummies minus 1
			drop triplet_`max'
			
		* Save the dataset to be used for all regressions 
			tempfile restricted_data_`i'
			save `restricted_data_`i''
			
		*------------------------------*
		*------------------------------*
			
	    * 1. TSIV without any corrections --- naive approach
			
			//Naive version without standard error correction 
			quietly reg hh_income_1936fix triplet_*
			predict yhat_father 
			
			reg fam_inc_real yhat_father if surveys==1 [aw=wgt_sex_race], robust
			
	    /* 2. TSIV correcting standard errors based on two sample uncertainty
	          Source: (https://ars.els-cdn.com/content/image/1-s2.0-S0165176516302373-mmc1.pdf) */
			
			use `restricted_data_`i'', clear

	    * First stage  					
			keep if census==1
			gen const=1 
			gen x1 = hh_income_1936fix
			
			//robust 
			quietly reg x1 triplet_*, robust
			mat Vx2het = e(V)
			
			//non-robust
			quietly sureg (x1 = triplet_*)
			mat Vx2hom = e(V)
					
		* Second stage 
			use `restricted_data_`i'', clear
			keep if surveys==1
			gen y = fam_inc_real 
			
			predict x1h, equation(x1) //generate predicted x using Census results from sureg
			
			scalar kx = 1 //number of predicted variables
			scalar ke = 1 //number of exogenous variables (constant)
			
			quietly reg y triplet_* [aw=wgt_sex_race]
			mat Vy1hom = e(V)*e(df_r)/_N
			
			quietly reg y triplet_* [aw=wgt_sex_race], robust  
			mat Vy1het = e(V)*e(df_r)/_N
			
			/* TS2SLS estimator */
			reg y x1h [aw=wgt_sex_race]
			scalar coeff1 = _b[x1h]
			display coeff1
			mat b2s = e(b)
			mat b2sx = b2s[1,1..kx]
			
			/* Constructing C hat */
			quietly reg triplet_1 x1h [aw=wgt_sex_race]
			mat ch = e(b)'
			forval b=2(1)`max_minus1' {
			quietly reg triplet_`b' x1h [aw=wgt_sex_race]
			mat ch = ch,e(b)'	
			}
			mat ch = ch,(J(kx,ke,0)\I(ke))
			
			* Calculating non-robust standard errors 
			mat var1hom = ch*Vy1hom*ch' + (b2sx' # ch)*Vx2hom*(b2sx # ch')
			mat seb2shom = vecdiag(cholesky(diag(vecdiag(var1hom))))'
			
			* Calculating robust standard errors 
			mat var1het = ch*Vy1het*ch' + (b2sx' # ch)*Vx2het*(b2sx # ch')
			mat seb2shet = vecdiag(cholesky(diag(vecdiag(var1het))))'
			
			mat list seb2shom
			mat list seb2shet
			
			scalar se1=seb2shet[1,1]
			display se1
			
			scalar ub1 = coeff1 + (1.96*se1)
			scalar lb1 = coeff1 - (1.96*se1)
			
			* Bring back full dataset and save estimates 
			replace est_1 = coeff1 if decade==`i' 
			replace est_ub_1 = ub1 if decade==`i'
			replace est_lb_1 = lb1 if decade==`i'
			
						
		* Save
			bysort decade: keep if _n==1
			drop if decade==.
			keep est_* decade 
			gen group =`m'
			
			tempfile results_`i'_`m'
			save `results_`i'_`m''
			
		}
	}
	
* Append results and save
	use `results_1920_1'
	append using `results_1930_1'
	
	forval m==2(1)4 {
		append using `results_1920_`m''
		append using `results_1930_`m''
	}
	
	
	sort decade group 
	reshape long est_ est_lb_ est_ub_, i(decade group) j(estimate)
	
	replace decade= decade+1.5 if group==2
	replace decade= decade+3 if group==3
	replace decade= decade+4.5 if group==4
	
	save "$Mydirectory2/appendix_d/TSIV_results_1940_1936fix_levels_subgroups.dta", replace 
	